"""
数据计算-单词计数统计
"""
from pyspark import SparkConf, SparkContext
import os
os.environ['PYSPARK_PYTHON'] = "D:/dev/python/python310/python.exe"
conf = SparkConf().setMaster("local[*]").setAppName("wordcount")
sc = SparkContext(conf=conf)
# 读取数据
rdd = sc.textFile("hello.txt")
# 进行单词计数统计
rdd2 = rdd.flatMap(lambda x: x.split(" ")) \
    .map(lambda x: (x, 1)) \
    .reduceByKey(lambda x, y: x + y)
# 输出结果
# print(rdd.collect())
print(rdd2.collect())
sc.stop()